/* 
This file takes 3_firm_mn.dta and does a bunch of merges & adds variables 

*/

clear
set matsize 100
set more off 
# delimit;
* Set directory; 

cd "~/Desktop/migrec_replication/do/";


use  ../dta_secure/3_firm_mn.dta, clear;

**************************************************************************************;
* Basic cleaning;
**************************************************************************************;


* create placebo treatment ;
	g post_pbo2007 = (year >=2007);
	bys agency_id: egen elig_migs_pbo2007_tmp = total(migrant_win) if inlist(year, 2005,2006),m ;
	bys agency_id: egen elig_migs_pbo2007 = max(elig_migs_pbo2007_tmp);
	
	g treat_pbo2007 = (elig_migs_pbo2007 >= 100) ;
	replace  treat_pbo2007  = . if  elig_migs_pbo2007 == . ;
	drop elig_migs_pbo2007_tmp ;
	
	g post_treat_pbo2007 = post_pbo2007 * treat_pbo2007;
	
	g post_elig_migs_pbo2007 = post_pbo2007 * elig_migs_pbo2007;
	
	g rest_sample_pbo2007 = (elig_migs_pbo2007 <=200);
	g rest_sample_small_pbo2007 = (elig_migs_pbo2007 <=150 & elig_migs_pbo2007 >50);
	g rest_sample_mid_pbo2007 = (elig_migs_pbo2007 <=200 & elig_migs_pbo2007 >50);
	
* create baseline migrant_wins for main intervention in years 2007 and 2008;
	bys agency_id: egen baseline_migs_tmp = total(migrant_win) if inlist(year, 2007,2008),m ;
	bys agency_id: egen baseline_migs = max(baseline_migs_tmp );
	la var baseline_migs "Baseline total migrant_wins in 2007 and 2008 prior to evaluation period";
	drop baseline_migs_tmp ;
	
* create baseline migrants for placebo intervention in years 2005 and 2006;

	bys agency_id: egen pbo_baseline_migs_tmp = total(migrant_win) if inlist(year, 2005,2006),m ;
	bys agency_id: egen pbo_baseline_migs = max(pbo_baseline_migs_tmp );
	la var pbo_baseline_migs "Baseline total migrants in 2005 and 2006 prior to placebo treatment";
	drop pbo_baseline_migs_tmp ;
	
	g pbo_post_baseline_migs = post * pbo_baseline_migs ;
	
	g post_baseline_migs = post * baseline_migs ;

	
* generate another threshold;
	g rest_sample_mid = (elig_migs <=200 & elig_migs > 50);
	la var rest_sample_mid "Restricted sample - 50 and 200 migrant_wins in 2009/10";

	* take care of 0s in revenues; 
	replace log_salary = log(usd_salary_win+1);
	
* distribution of ratings;
* by assessment year, generate mean complaint rate; 
	bys agency_id: egen mean_comp_rate_9_10_tmp = mean(comp_int) if inlist(year, 2009, 2010);
	bys agency_id: egen mean_comp_rate_9_10 = max(mean_comp_rate_9_10_tmp) ;
	drop mean_comp_rate_9_10_tmp;
	la var mean_comp_rate_9_10 "Complaint rate in assessment years 09/10 by departure year";

	bys agency_id: egen mean_inci_comp_rate_9_10_tmp = mean(inci_comp_int) if inlist(year, 2009, 2010);
	bys agency_id: egen mean_inci_comp_rate_9_10 = max(mean_inci_comp_rate_9_10_tmp) ;
	drop mean_inci_comp_rate_9_10_tmp;
	la var mean_inci_comp_rate_9_10 "Complaint rate in assessment years 09/10 by incident year";

	replace agency_star_2012 = 0 if elig_firm == 1 & agency_star_2012==.;
	replace agency_star_2012 = 0 if elig_firm == 0;
	
	lab define starz 0 "0-star" 1 "1-star" 2 "2-star" 3 "3-star"  4 "4-star" 5 "5-star", modify;
	lab values agency_star_2012 starz;
	
	
*variables for good/bad ratings; 
	
	gen good_rating = (agency_star_2012 >= 1 & agency_star_2012 != . ); 
	gen bad_rating = (agency_star_2012 == 0 & elig_firm == 1); 

	gen good_rating_post = good_rating*post; 
	gen bad_rating_post = bad_rating*post;

	gen good_rating_post_pbo = good_rating*post_pbo2007;
	gen bad_rating_post_pbo = bad_rating*post_pbo2007;
	
	gen post_2009 = (year > 2009); 
	gen elig_firm_post_2009 = (elig_firm*post_2009);
	gen good_rating_post_2009 = (good_rating*post_2009);
	gen bad_rating_post_2009 = (bad_rating*post_2009);


	g exist2009 = (agency_license_yr < 2009);
	
*variables for good/bad ratings; 


	gen vgood_rating = (agency_star_2012>2 & agency_star_2012 != .); 
	gen vgood_rating_post = good_rating*post; 

	gen good2_rating = (agency_star_2012 < 3 & agency_star_2012 != .); 
	gen good2_rating_post = good2_rating*post; 
	
	global cat3 "good_rating bad_rating post good_rating_post bad_rating_post";
	global cat4 "vgood_rating good2_rating bad_rating post vgood_rating_post good2_rating_post bad_rating_post";

* gen types ;
	gen types = "";
	replace types = "very good" if vgood_rating == 1 & elig_firm == 1; 
	replace types = "good" if good2_rating == 1 & elig_firm == 1; 
	replace types = "bad" if bad_rating == 1 & elig_firm == 1; 
	replace types = "unrated" if elig_firm == 0;
	drop if types == "";

* gen types 3-cat; 

	gen types_3cat = "";
	replace types_3cat = "good" if good_rating == 1 & elig_firm == 1; 
	replace types_3cat = "bad" if bad_rating == 1 & elig_firm == 1; 
	replace types_3cat = "unrated" if elig_firm == 0;
	drop if types_3cat == "";

* baseline migs buckets - 08;

	g d_mig08_50 = 1 		if year == 2008 & migrant <=50;
	g d_mig08_50_100 = 1 		if year == 2008 & migrant >50 & migrant <=100;
	g d_mig08_100_200 = 1 	if year == 2008 & migrant >100 & migrant<=200;
	g d_mig08_200 = 1 	if year == 2008 & migrant >200 & migrant!=.;

	foreach x in mig08_50 mig08_50_100 mig08_100_200 mig08_200 {;			
			bys agency_id: egen dum_`x' = max(d_`x');
			replace dum_`x' = 0 if dum_`x' == .;
	};
	
	drop d_mig08*;

	g t_mig08_50 = 1 		if year == 2008 & migrant <=50;
	g t_mig08_50_100 = 1 	if year == 2008 & migrant >50 & migrant<=100;
	g t_mig08_100_150 = 1 	if year == 2008 & migrant >100 & migrant<=150;
	g t_mig08_150_200 = 1 	if year == 2008 & migrant >150 & migrant<=200;
	g t_mig08_200_250 = 1 	if year == 2008 & migrant >200 & migrant<=250;
	g t_mig08_250_300 = 1 	if year == 2008 & migrant >250 & migrant<=300;
	g t_mig08_300_350 = 1 	if year == 2008 & migrant >300 & migrant<=350;
	g t_mig08_350_400 = 1 	if year == 2008 & migrant >350 & migrant<=400;
	g t_mig08_400_450 = 1 	if year == 2008 & migrant >400 & migrant<=450;
	g t_mig08_450 = 1 	if year == 2008 & migrant >450 & migrant!=.;
		
	foreach x in mig08_50 mig08_50_100   mig08_100_150  mig08_150_200  mig08_200_250  mig08_250_300  
		mig08_300_350  mig08_350_400  mig08_400_450 mig08_450 {;	
		
			bys agency_id: egen `x' = max(t_`x');
			replace `x' = 0 if `x' == .;
	};
	
	drop t_mig08*;
	

* baseline migs buckets - 05;

	g d_mig05_50 = 1 		if year == 2005 & migrant <=50;
	g d_mig05_50_100 = 1 		if year == 2005 & migrant >50 & migrant <=100;
	g d_mig05_100_200 = 1 	if year == 2005 & migrant >100 & migrant<=200;
	g d_mig05_200 = 1 	if year == 2005 & migrant >200 & migrant!=.;

	foreach x in mig05_50 mig05_50_100 mig05_100_200 mig05_200 {;			
			bys agency_id: egen dum_`x' = max(d_`x');
			replace dum_`x' = 0 if dum_`x' == .;
	};
	
	drop d_mig05*;
	
* zero migrants; 

	g zero_migs = ( migrant == 0) ;
	replace zero_migs = . if migrant == . ;

	g atleast1_migs = ( migrant > 0 & migrant!=.) ;
	replace atleast1_migs = . if migrant == . ;

	bys year types: egen num_zero_migs = total(zero_migs);

	g zero_women = (women == 0);
	replace zero_women = . if women == . ;

	egen mig_cn_top5 = rowtotal(saudi qatar uae kuwait jordan),m;
	g mig_share_top5 =  mig_cn_top5/ migrant; 
	replace mig_share_top5 = 0 if migrant == 0 ;
	
* zero job orders;

	gen zero_jo = (num_job_orders == 0);
	replace zero_jo = . if num_job_orders == . ;
	
	gen atleast1_jo = (num_job_orders > 0 & num_job_orders!=.);
	replace atleast1_jo = . if num_job_orders == . ;	
	
* complaint type variables;

	g harass_int = harass / migrant; 
	g breach_int = (breach +nonpay) / migrant; 
	g nonpay_int = nonpay / migrant; 

	foreach x in harass breach nonpay /*sickness nocomm*/ {;
		replace `x'_int= 0 if comp_int == 0 & `x'_int==.;
	};
	
	

* clean up outliers;

	egen pctl99 = pctile(migrant), p(99);
	g migrant_t99 = migrant ;
	replace migrant_t99 = . if migrant>pctl99;

	g mig_share_women_t99 = mig_share_women ;
	replace mig_share_women_t99 = . if migrant_t99 == .;

	g mig_share_low_skill_t99 = mig_share_low_skill ;
	replace mig_share_low_skill_t99 = . if migrant_t99 == .;

	g mig_t995 = migrant if migrant <=2543; 

	winsor migrant, p(0.01) highonly gen(migrant_w99);

	g post_elig_migs = elig_migs * post;
	
	
*merge in report data; 


merge m:1 license_no using ../dta_secure/report_ratings2012.dta, gen(reportmerge); 

keep if reportmerge == 1 | reportmerge == 3; 


gen perf_post = performance_rate*post;

* clean perfromance rate; 

replace performance_rate = (performance_rate/10) if performance_rate == 297 | performance_rate == 354; 


* star cutoffs and interactions; 


gen zero_star = (performance_rate < 19.9 & performance_rate != . ); 
gen one_star = (performance_rate > 19.9 & performance_rate < 35 & performance_rate != . ); 
gen two_star = (performance_rate > 34.9 & performance_rate < 50 & performance_rate != .); 
gen three_star = (performance_rate > 49.9 & performance_rate < 60 & performance_rate != .); 
gen four_star = (performance_rate > 59.9 & performance_rate < 80 & performance_rate != .); 


gen one_perf = one_star*performance_rate; 
gen two_perf = two_star*performance_rate; 
gen three_perf = three_star*performance_rate; 
gen four_perf = four_star*performance_rate; 



* variables for a composite rd with non-overlapping windows; 

foreach i of numlist 2/10 {; 

gen base_`i' = 1 if performance_rate >= 20-`i' & performance_rate <= 20+`i'| 
performance_rate >= 35-`i' & performance_rate <= 35+`i' |
performance_rate >= 50-`i' & performance_rate <= 50+`i' | 
performance_rate >= 60-`i' & performance_rate <= 60+`i' | 
performance_rate >= 80-`i' & performance_rate <= 80+`i' ;
};


foreach i of numlist 2/10 {; 

gen treat_`i' = 1 if performance_rate >= 20 & performance_rate <= 20+`i'| 
performance_rate >= 35 & performance_rate <= 35+`i' |
performance_rate >= 50 & performance_rate <= 50+`i' | 
performance_rate >= 60 & performance_rate <= 60+`i' | 
performance_rate >= 80 & performance_rate <= 80+`i' ;

replace treat_`i' = 0 if base_`i' == 1 & treat_`i' == . ;

gen treat_perf_`i' = treat_`i'*performance_rate; 


};

gen performance_rate_2 = performance_rate^2;
gen performance_rate_3 = performance_rate^3;
gen performance_rate_4 = performance_rate^4;



* produce dummies and slopes for cutoffs  ;

foreach i of numlist 2/10 {; 

gen cutoff_1_`i' = (performance_rate >= 20-`i' & performance_rate <= 20+`i'); 
gen cutoff_2_`i' = (performance_rate >= 35-`i' & performance_rate <= 35+`i');
gen cutoff_3_`i' = (performance_rate >= 50-`i' & performance_rate <= 50+`i');
gen cutoff_4_`i' = (performance_rate >= 60-`i' & performance_rate <= 60+`i');
gen cutoff_5_`i' = (performance_rate >= 80-`i' & performance_rate <= 80+`i'); 


gen cutoff_1_`i'_perf =  cutoff_1_`i'*performance_rate;
gen cutoff_2_`i'_perf =  cutoff_2_`i'*performance_rate;
gen cutoff_3_`i'_perf =  cutoff_3_`i'*performance_rate;
gen cutoff_4_`i'_perf =  cutoff_4_`i'*performance_rate;
gen cutoff_5_`i'_perf =  cutoff_5_`i'*performance_rate;


};



** other vars; 


gen log_migrant = log(migrant+1); 


** other treatment variables; 

sum performance_rate if year == 2012 & elig_firm == 1, det; 

gen abmed_rating = (performance_rate > `r(p50)' & performance_rate != .);
gen blmed_rating = (performance_rate <= `r(p50)' & performance_rate != .);


gen abmed_post = abmed_rating * post;
gen blmed_post = blmed_rating * post; 


gen zero_star_post = zero_star*post;
gen one_star_post = one_star*post; 
gen two_star_post = two_star*post; 
gen three_star_post = three_star*post; 
gen four_star_post = four_star*post; 


** variables for cem ; 

bys agency_id: egen mean_migz = mean(migrant) if inlist(year,2005,2006,2007,2008,2009);
bys agency_id: egen mean_migs = max(mean_migz); 

bys agency_id: egen mean_compz = mean(comp_int) if inlist(year,2005,2006,2007,2008,2009);
bys agency_id: egen mean_comps = max(mean_compz); 

bys agency_id: egen mean_revz = mean(usd_salary_win) if inlist(year,2005,2006,2007,2008,2009);
bys agency_id: egen mean_revs = max(mean_revz); 

bys agency_id: egen mean_joz = mean(num_job_orders) if inlist(year,2005,2006,2007,2008,2009);
bys agency_id: egen mean_jos = max(mean_joz); 


bys agency_id: egen mean_dwz = mean(domestic_worker_share) if inlist(year,2005,2006,2007,2008,2009);
bys agency_id: egen mean_dws = max(mean_dwz); 


** complaint heterogeneity;

bys agency_id: egen comp_avgz = mean(comp_int)  if inlist(year,2005,2006,2007,2008);

bys agency_id: egen comp_avgs = max(comp_avgz); 


sum comp_avgs if year == 2006, det; 

gen abmed_comp = (comp_avgs > `r(p50)'); 

gen good_above = good_rating*abmed_comp; 

gen bad_above = bad_rating*abmed_comp; 

gen good_above_post = good_above*post; 

gen bad_above_post = bad_above*post; 

gen abmed_comp_post = abmed_comp*post; 

gen good_above_post_2009 = good_above*post_2009; 

gen bad_above_post_2009 = bad_above*post_2009; 

gen abmed_comp_post_2009 = abmed_comp*post_2009; 



gen elig_above = elig_firm*abmed_comp; 
gen elig_above_post_2009 = elig_above*post_2009; 

gen elig_above_post = elig_above*post; 



gen elig_firm_post = elig_firm*post; 

gen migrantz_09 = migrant if year == 2009; 
bysort agency_id : egen migranty_09  = sum(migrantz_09);
bysort agency_id : egen migrant_09  = max(migranty_09);
drop migrantz_09 migranty_09 ;

gen migrantz_08 = migrant if year == 2008; 
bysort agency_id : egen migranty_08  = sum(migrantz_08);
bysort agency_id : egen migrant_08  = max(migranty_08);
drop migrantz_08 migranty_08 ;

gen migrantz_10 = migrant if year == 2010; 
bysort agency_id : egen migranty_10  = sum(migrantz_10);
bysort agency_id : egen migrant_10  = max(migranty_10);
drop migrantz_10 migranty_10 ;

gen migrantz_07 = migrant if year == 2007; 
bysort agency_id : egen migranty_07  = sum(migrantz_07);
bysort agency_id : egen migrant_07 = max(migranty_07);
drop migrantz_07 migranty_07 ;

gen migrantz_06 = migrant if year == 2006; 
bysort agency_id : egen migranty_06  = sum(migrantz_06);
bysort agency_id : egen migrant_06  = max(migranty_06);
drop migrantz_06 migranty_06 ;



** heterogeneity by solved rate; 


bys agency_id: egen solv_avgz = mean(solved_rate)  if inlist(year,2005,2006,2007,2008);

bys agency_id: egen solv_avgs = max(solv_avgz); 




sum solv_avgs if year == 2006, det; 
gen abmed_solv = (solv_avgs > `r(p50)'); 
gen good_sabove = good_rating*abmed_solv; 
gen bad_sabove = bad_rating*abmed_solv; 
gen good_sabove_post = good_above*post; 
gen bad_sabove_post = bad_above*post; 
gen abmed_solv_post = abmed_solv*post; 
gen good_sabove_post_2009 = good_above*post_2009; 
gen bad_sabove_post_2009 = bad_above*post_2009; 
gen abmed_solv_post_2009 = abmed_solv*post_2009; 


** elig firm and solved rate vars; 


gen elig_firm_post_09_solv = elig_firm*post_2009*abmed_solv;

gen elig_firm_post_solv = elig_firm*post*abmed_solv;



gen elig_sabove = elig_firm*abmed_solv; 
gen elig_sabove_post_2009 = elig_sabove*post_2009; 

gen elig_sabove_post = elig_sabove*post; 


gen post_14 = (year > 2014); 

gen good_rating_post_14 = good_rating*post_14; 

gen bad_rating_post_14 = bad_rating*post_14; 

gen post_12 = (year == 2012 | year == 2013 | year == 2014); 

gen good_rating_post_12 = good_rating*post_12; 

gen bad_rating_post_12 = bad_rating*post_12; 




** merge in quantitative ratings - yr -level; 
			
merge m:1 agency_id year using ../dta_secure/ratings_yrs.dta, keepusing(partial_qual_marks rp_partial_qual_marks slbfe_performance_rate rp_partial_audit_marks
lg_f1_renewal lg_f2_bank_guaranty lg_f3_book_a lg_f3_book_b lg_f3_book_c lg_f3_book_d lg_f3_receipt
 lg_f3_passport lg_f4_1a lg_f4_1b_nameboard lg_f4_1c_appearance lg_f4_2a_office_area
  lg_f4_2b_process_mgmt lg_f4_2c_equip lg_f5_agency_age lg_f6_i_office_staff lg_f6_ii_id_cards
   lg_f7_i_bio_data lg_f7_ii_awareness lg_g1_higher_ed lg_g2_advanced_ed lg_g3_ordinary_ed
    lg_g4_lower_ed lg_extra_bonus) gen(ratings_yrs_merge) keep(1 3);
			
		egen med_score2009_tmp = median(partial_qual_marks) if year == 2009;
		bys agency_id: egen med_score2009 = max(med_score2009_tmp);
			
		gen score2009_tmp = partial_qual_marks if year == 2009;
		bys agency_id: egen score2009 = max(score2009_tmp);
			
		bys agency_id: egen mean_score_pre2009_tmp = mean(partial_qual_marks) if year <= 2009;
		egen med_score_pre2009 = median(mean_score_pre2009_tmp);
			
		bys agency_id: egen mean_score_pre2009 = max(mean_score_pre2009_tmp);
		
			
			
		drop med_score2009_tmp score2009_tmp mean_score_pre2009_tmp;
		
			
		gen abmed_score2009 = (partial_qual_marks >= med_score2009);
		gen abmed_score_pre2009 = (partial_qual_marks >= med_score_pre2009);
				
		gen abmed_score2009_post2009 = abmed_score2009 * post_2009;
		gen score2009_post2009 = score2009 * post_2009;
		gen abmed_score_pre2009_post2009 = abmed_score_pre2009 * post_2009;
		gen score_pre2009_post2009 = mean_score_pre2009 * post_2009;
		
		


* random vars; 

gen any_complaint = (complaints > 0 ); 
label var any_complaint "Non-zero departure complaints"; 

gen any_fcomplaint = (filed_complaints >0); 
label var any_fcomplaint "Non-zero filed complaints"; 


* create quality controls; 


bysort agency_id: egen quality_controlz_0508 = mean(partial_qual_marks) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen quality_control_0508 = max(quality_controlz_0508);
drop quality_controlz_0508;


* more treatment vars; 

	
gen post_200911 = (year > 2009 & year < 2012); 
	gen elig_firm_post_200911 = (elig_firm*post_200911);
	gen good_rating_post_200911 = (good_rating*post_200911);
	gen bad_rating_post_200911 = (bad_rating*post_200911);
	
	

** interact quality 05-08 with elig firm; 

* continuous; 
gen elig_firm_qc_post_2009 = elig_firm_post_2009 * quality_control_0508; 
gen qc_post_2009 = quality_control_0508*post_2009 ; 
gen elig_firm_qc = elig_firm*quality_control_0508;

* discrete; 

sum quality_control_0508 if year == 2008, det; 
gen qcm_0508 = (quality_control_0508 >= `r(p50)'); 


gen elig_firm_qcm_post_2009 = elig_firm_post_2009 * qcm_0508; 
gen qcm_post_2009 = qcm_0508*post_2009 ; 
gen elig_firm_qcm = elig_firm*qcm_0508;



** interact quality 09 with elig firm; 
gen quality_controlz_09 = partial_qual_marks if year == 2009;

bysort agency_id: egen quality_control_09 = max(quality_controlz_09);

drop quality_controlz_09; 

* continuous; 

gen elig_firm_qc09_post_2009 = elig_firm_post_2009 * quality_control_09; 
gen qc09_post_2009 = quality_control_09*post_2009 ; 
gen elig_firm_qc09 = elig_firm*quality_control_09;

* discrete; 

sum quality_control_09 if year == 2009, det; 
gen qcm_09 = (quality_control_09 > `r(p50)'); 


gen elig_firm_qcm09_post_2009 = elig_firm_post_2009 * qcm_09; 
gen qcm09_post_2009 = qcm_09*post_2009 ; 
gen elig_firm_qcm09 = elig_firm*qcm_09;


gen elig_firm_qcm09_post = elig_firm_post * qcm_09; 
gen qcm09_post = qcm_09*post; 



* figure out predicted rating;


reg performance_rate quality_control_09 if elig_firm == 1 & year == 2012, r; 
gen predicted_score = _b[_cons] + _b[quality_control_09]*quality_control_09 ; 

gen pred_score_post = predicted_score*post; 
gen pred_score_post_2009 = predicted_score*post_2009; 

gen pred_score_elig_post = predicted_score*post*elig_firm; 
gen pred_score_elig_post_2009 = predicted_score*post_2009*elig_firm; 

gen pred_score_elig = predicted_score*elig_firm;

* above/below median predicted rating; 

sum predicted_score if year == 2009 & elig_firm == 1, det; 

gen high_score = predicted_score > `r(p50)'; 
gen low_score = predicted_score <= `r(p50)'; 


gen high_elig_post = high_score*post*elig_firm; 
gen low_elig_post = low_score*post*elig_firm; 


gen high_elig_post_2009 = high_score*post_2009*elig_firm; 
gen low_elig_post_2009 = low_score*post_2009*elig_firm; 

gen high_post = high_score*post; 
gen low_post = low_score*post; 

gen high_post_2009 = high_score*post_2009; 
gen low_post_2009 = low_score*post_2009; 


* create stars with predicted scores; 

gen pred_star = 0;
replace pred_star = 1 if (predicted_score > 19.9 & predicted_score < 35 & predicted_score != .); 
replace pred_star = 2 if (predicted_score > 34.9 & predicted_score < 50 & predicted_score != .); 
replace pred_star = 3 if (predicted_score > 49.9 & predicted_score < 60 & predicted_score != .); 
replace pred_star = 4 if (predicted_score > 59.9 & predicted_score < 80 & predicted_score != .); 
replace pred_star = 5 if predicted_score > 80 & predicted_score != .; 

gen pred_star_post = pred_star*post; 
gen pred_star_post_2009 = pred_star*post_2009; 


gen pred_star_elig_post = pred_star*post*elig_firm; 
gen pred_star_elig_post_2009 = pred_star*post_2009*elig_firm; 


gen pred_star_0 = (pred_star == 0); 
gen pred_star_1 = (pred_star == 1); 
gen pred_star_2 = (pred_star == 2); 
gen pred_star_3 = (pred_star == 3); 
gen pred_star_4 = (pred_star == 4); 


gen ps_0_elig_post_2009 = pred_star_0*elig_firm*post_2009; 
gen ps_1_elig_post_2009 = pred_star_1*elig_firm*post_2009; 
gen ps_2_elig_post_2009 = pred_star_2*elig_firm*post_2009; 
gen ps_3_elig_post_2009 = pred_star_3*elig_firm*post_2009; 
gen ps_4_elig_post_2009 = pred_star_4*elig_firm*post_2009; 


gen ps_0_elig_post = pred_star_0*elig_firm*post; 
gen ps_1_elig_post = pred_star_1*elig_firm*post; 
gen ps_2_elig_post = pred_star_2*elig_firm*post; 
gen ps_3_elig_post = pred_star_3*elig_firm*post; 
gen ps_4_elig_post = pred_star_4*elig_firm*post; 



* 1. create variable for months and years; 


sort year month; 
egen month_year = group(year month); 



* 1. FIGURE OUT IF AGENCIES SWITCH ON AND OFF  -- month year level ; 

* a. figure out the first month where an agency had zero migrants;  
gen migrant_min_my = 1 if migrant == 0; 


bysort agency_id migrant_min_my: egen min_zero_yr_my = min(month_year); 
replace min_zero_yr_my = . if migrant_min_my == . ;
bysort agency_id: egen mig_min_my = max(min_zero_yr_my);
drop  min_zero_yr_my;
label var mig_min_my "The first month-year in which an agency sent zero migrants";

* b. figure out the last month year in which there were non-zero migrants; 

gen migrant_nz_my = 1 if migrant > 0 & migrant != . ; 
bysort agency_id migrant_nz_my: egen max_nz_yr_my = max(month_year); 
replace max_nz_yr_my = . if migrant_nz_my == . ;
bysort agency_id: egen mig_nz_my = max(max_nz_yr_my);
label var mig_nz_my "The last month year in which an agency sent non-zero migrants";
drop max_nz_yr_my;

* c. how agencies had a non-zero year AFTER a zero year; 


gen groundhogs_my = mig_nz_my > mig_min_my;
label var groundhogs_my "Agencies which send zero migrants and then send non-zero migrants thereafter";
* turns out there are 309 agencies who recruit noone and then switch back on; 


*d. update --- easier way to calculate firm exit; 
* figure out the last non-zero month year; 
* all month-years thereeafter coded as 1 (i.e. firm dead); 

gen firm_exit = (month_year > mig_nz_my); 

label var firm_exit "coded as 1 if the firm no longer sends migrants";


* 73 corresponds to jan 2011, 109 corresponds to jan 2014; 

gen exit_11 = (mig_nz_my < 73); 

gen exit_14 = (mig_nz_my < 109); 


label var exit_11 "firm exited by jan 2011"; 
label var exit_14 "firm exited by jan 2014"; 

** firm exit variable with minimum of 1 yr and 6 months ; 

gen firm_exit_1yr = (month_year > mig_nz_my & mig_nz_my <= 120); 

label var firm_exit_1yr "firm that has zero migrants for at least 12 consecutive months";



gen firm_exit_6mo = (month_year > mig_nz_my & mig_nz_my <= 126); 

label var firm_exit_6mo "firm that has zero migrants for at least 6 consecutive months";



gen firm_exit_2yr = (month_year > mig_nz_my & mig_nz_my <= 108); 

label var firm_exit_2yr "firm that has zero migrants for at least 24 consecutive months";


* 2. FIGURE OUT WHICH AGENCIES SWITCH OFF AND THEN STAY THAT WAY FOREVER;

*a. figure out the last month year where and agency had zero migrants; 

bysort agency_id migrant_min_my: egen max_zero_yr_my = max(month_year); 
replace max_zero_yr_my = . if migrant_min_my == . ;
bysort agency_id: egen mig_max_my = max(max_zero_yr_my);
label var mig_max_my "The last month year in which agency sent zero migrants";
gen dead_my = mig_max_my > mig_nz_my & mig_max_my != . & mig_nz_my != . ;
label var dead_my "Agencies who never send migrants after their last zero month year"; 
gen dead_09_my = (dead_my == 1 & mig_min_my > 49);


*** interactions with pre-program measures of quality; 

** complaint settlement; 

*a. solve timing; 
bysort agency_id: egen solvmz_0508 = mean(solv_months) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen solvm_0508 = max(solvmz_0508);
drop solvmz_0508;

label var solvm_0508 "Average time to solve complaints, 05-08"; 

*b. solve rate; 

bysort agency_id: egen solvratz_0508 = mean(solved_rate) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen solvrat_0508 = max(solvratz_0508);
drop solvratz_0508;

label var solvrat_0508 "Average ever solve rate, 05-08"; 


*c. complaint rate - departures; 

bysort agency_id: egen compintz_0508 = mean(comp_int) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen compint_0508 = max(compintz_0508);
drop compintz_0508;

label var compint_0508 "Average (departure) complaint intensity, 05-08"; 

*d. complaint rate - filed; 

bysort agency_id: egen fcompintz_0508 = mean(filed_comp_int) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen fcompint_0508 = max(fcompintz_0508);
drop fcompintz_0508;

label var fcompint_0508 "Average (filed) complaint intensity, 05-08"; 

*f. # of filed complaints; 

bysort agency_id: egen fcompintz_0508 = sum(filed_complaints) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen filed_complaints_0508 = max(fcompintz_0508);
drop fcompintz_0508;

label var filed_complaints_0508 "Total (filed) complaints, 05-08"; 

*g. # of departure complaints; 

bysort agency_id: egen compintz_0508 = sum(complaints) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen complaints_0508 = max(compintz_0508);
drop compintz_0508;

label var complaints_0508 "Total (departure) complaints, 05-08"; 


* avg migrant recruitment; 
bysort agency_id: egen migrantz_0508 = mean(migrant) if inlist(year,2005,2006,2007,2008); 
bysort agency_id: egen migrants_0508 = max(migrantz_0508);
drop migrantz_0508;

label var complaints_0508 "Total (departure) complaints, 05-08"; 


** changes for working with monthly data; 




* sept 2009 = 57th month, b'cos 4yrs*12mos + 9mos;
* adjusting this to first month of 2010 -> 61; 
gen postm_2009 = (month_year >= 61); 

* > feb 2012, awards are public; 
gen postm = (month_year >= 87); 




* sept 2009 - feb 2012 ; 

gen postmp_2009 = (month_year >= 61 & month_year < 87);




gen elig_firm_postm_2009 = elig_firm*postm_2009;
gen elig_firm_postm = elig_firm*postm;
gen elig_firm_postmp_2009 = elig_firm*postmp_2009;



gen pred_score_postm = predicted_score*postm; 
gen pred_score_postm_2009 = predicted_score*postm_2009; 
gen pred_score_postmp_2009 = predicted_score*postmp_2009; 


gen pred_score_elig_postm = predicted_score*postm*elig_firm; 
gen pred_score_elig_postm_2009 = predicted_score*postm_2009*elig_firm; 
gen pred_score_elig_postmp_2009 = predicted_score*postmp_2009*elig_firm; 



** competition variables - villages from which agencies recruit; 

merge m:1 agency_id using ../dta_secure/agency_comp.dta, gen(comp_merge) keep(1 3); 



gen compv = agencies_per_vill;


* merge in agency distance - agencies located close to head office using GPS ; 

merge m:1 agency_id using ../dta_secure/agency_distance.dta, gen(distance_merge) keep(1 3); 


gen clust_halfkm = inelig_within_halfkm + elig_within_halfkm;
gen clust_1km = inelig_within_1km + elig_within_1km ;
gen clust_5km = inelig_within_5km + elig_within_5km ;
gen clust_10km = inelig_within_10km + elig_within_10km ;
gen clust_100ft = inelig_within_100ft + elig_within_100ft ;
gen clust_250ft = inelig_within_250ft + elig_within_250ft ;
gen clust_500ft = inelig_within_500ft + elig_within_500ft ;
gen elig_100ft = elig_within_100ft;
gen elig_250ft = elig_within_250ft; 
gen elig_500ft = elig_within_500ft;
gen elig_halfkm = elig_within_halfkm;
gen elig_1km = elig_within_1km;
gen elig_5km = elig_within_5km;


foreach x of varlist elig_100ft elig_250ft elig_500ft elig_halfkm elig_1km elig_5km { ; 

gen any_`x' = (`x' >= 1);
gen any_`x'_postm_2009 = any_`x' * postm_2009; 

};





lab var clust_halfkm "no. of agencies in a 0.5 km radius"; 
lab var clust_1km "no. of agencies in a 1 km radius"; 
lab var clust_5km "no. of agencies in a 5 km radius"; 
lab var clust_10km "no. of agencies in a 10 km radius"; 
lab var clust_100ft "no. of agencies in a 100ft radius"; 
lab var clust_250ft "no. of agencies in a 250ft radius"; 
lab var clust_500ft "no. of agencies in a 500ft radius"; 


* merge in agency owner ethnicity data; 

merge m:1 agency_id using ../dta_secure/agency_ethnicity.dta, keep(1 3) gen(eth_merge);

gen elig_100ft_e = elig_within_100ft_e;
gen elig_250ft_e = elig_within_250ft_e; 
gen elig_500ft_e = elig_within_500ft_e;
gen elig_halfkm_e = elig_within_halfkm_e;
gen elig_1km_e = elig_within_1km_e;
gen elig_5km_e = elig_within_5km_e;


foreach x of varlist elig_100ft elig_250ft elig_500ft elig_halfkm elig_1km elig_5km { ; 

gen any_`x'_e = (`x'_e >= 1);
gen any_`x'_e_postm_2009 = any_`x'_e * postm_2009; 

};


foreach x of varlist elig_100ft elig_250ft elig_500ft elig_halfkm elig_1km elig_5km { ; 

lab var `x' "number of eligible agencies within `x' ft";
lab var any_`x' "any eligible agency within `x' ft";
lab var `x'_e "number of co-ethnic eligible agencies within `x' ft";
lab var any_`x'_e "any co-ethnic eligible agency within `x' ft";

};


** merge in HH index; 

merge m:1 agency_vil_id using ../dta_secure/hh_index.dta, keep(1 3) gen(hh_index_merge);



*i. interactions with treatment vars; 

foreach x of varlist solvm_0508 solvrat_0508 compint_0508 fcompint_0508 compv clust_halfkm clust_1km clust_5km clust_10km clust_100ft clust_250ft clust_500ft
   elig_100ft hh_index_0708 {; 

gen `x'_elig_post = `x'*post*elig_firm; 
gen `x'_elig_post_2009 = `x'*post_2009*elig_firm;
gen `x'_post = `x'*post; 
gen `x'_post_2009 = `x'*post_2009; 
gen `x'_elig_firm = `x'*elig_firm; 

sum `x', det; 
gen `x'_high = (`x' > `r(p50)'); 
gen `x'_low = (`x' <= `r(p50)'); 

gen `x'_hpost = `x'_high*post; 
gen `x'_hpost_2009 = `x'_high*post_2009; 

gen `x'_helig_post = `x'_high*elig_firm*post; 
gen `x'_helig_post_2009 = `x'_high*elig_firm*post_2009; 

gen `x'_lpost = `x'_low*post; 
gen `x'_lpost_2009 = `x'_low*post_2009; 

gen `x'_lelig_post = `x'_low*elig_firm*post; 
gen `x'_lelig_post_2009 = `x'_low*elig_firm*post_2009; 





};



* above/below median predicted rating; 



gen high_elig = high_score*elig_firm; 
gen low_elig = low_score*elig_firm; 


gen high_elig_postm = high_score*postm*elig_firm; 
gen low_elig_postm = low_score*postm*elig_firm; 


gen high_elig_postm_2009 = high_score*postm_2009*elig_firm; 
gen low_elig_postm_2009 = low_score*postm_2009*elig_firm; 

gen high_elig_postmp_2009 = high_score*postmp_2009*elig_firm; 
gen low_elig_postmp_2009 = low_score*postmp_2009*elig_firm; 

gen high_postm = high_score*postm; 
gen low_postm = low_score*postm; 

gen high_postm_2009 = high_score*postm_2009; 
gen low_postm_2009 = low_score*postm_2009; 


gen high_postmp_2009 = high_score*postmp_2009; 
gen low_postmp_2009 = low_score*postmp_2009; 


* 2. import stuff from strategic_months dofile ; 

* first, figure out total migrants recruited between 01/1/09 and 08/31/09;


bysort agency_id: egen migrantz_200909  = sum(migrant) if inlist(month, 1,2,3,4,5,6,7,8) & year == 2009; 
bysort agency_id: egen migrants_200909 = max(migrantz_200909); 
drop migrantz_200909;

label var migrants_200909 "Number of migrants recruited in first 8 months of 2009";

/*drop if migrants_200909 == 0 ; */
gen elig_200909 = (migrants_200909 >= 100); 

label var elig_200909 "Agency recruited >= 100 by 8/31/2009"; 


gen migs_09_100_125 = (migrants_200909 >= 100 & migrants_200909 <= 125); 
gen migs_09_75_99 = (migrants_200909 >= 75 & migrants_200909 < 100); 
gen migs_09_50_99 = (migrants_200909 >= 50 & migrants_200909 < 100); 

label var migs_09_100_125 "Agency recruited between 100 and 125 by 8/31/2009"; 
label var migs_09_75_99 "Agency recruited between 75 and 99 by 8/31/2009"; 
label var migs_09_50_99 "Agency recruited between 50 and 99 by 8/31/2009"; 


* first'ish, figure out total migrants recruited in 2009; 

bysort agency_id: egen migrantz_200912  = sum(migrant) if year == 2009; 
bysort agency_id: egen migrants_200912 = max(migrantz_200912); 
drop migrantz_200912;

label var migrants_200912 "Number of migrants recruited in 2009"; 


gen elig_200912 = (migrants_200912 >= 100); 

label var elig_200912 "Agency recruited >= 100 by 12/31/2009"; 


* first'ish'ish, figure out total migrants recruited in last four months of 2009; 

bysort agency_id: egen migrantz_200909_12  = sum(migrant) if inlist(month,9,10,11,12) & year == 2009; 
bysort agency_id: egen migrants_200909_12 = max(migrantz_200909_12); 
drop migrantz_200909_12;


label var migrants_200909_12 "Number of migrants recruited in last 4 months of 2009"; 



* second, figure out avg. complaint rate between 01/1/09 and 08/31/09;

bysort agency_id: egen comp_intz_200909  = mean(comp_int) if inlist(month, 1,2,3,4,5,6,7,8) & year == 2009 & migrant != . & migrant > 0 ; 
bysort agency_id: egen comp_int_200909 = max(comp_intz_200909); 
drop comp_intz_200909;

label var comp_int_200909 "Complaint rate for first 8 months of 2009"; 


sum comp_int_200909, det; 
gen high_comp_int = (comp_int_200909 > `r(p50)');


* third, figure out avg. complaint solve time between 01/1/09 and 08/31/09;


bysort agency_id: egen solv_monthsz_200909  = mean(solv_months) if inlist(month, 1,2,3,4,5,6,7,8) & year == 2009 & complaints != 0 & complaints > 0 ; 
bysort agency_id: egen solv_months_200909 = max(solv_monthsz_200909); 
drop solv_monthsz_200909;


label var solv_months_200909 "Avg. complaint solve time for first 8 months of 2009"; 



sum solv_months_200909, det; 
gen high_solv_months = (solv_months_200909 > `r(p50)');

* fourth, figure out per month recruitment in first 8 months and last 4 months; 


gen migrant_rate_200909 = migrants_200909/8; 
gen migrant_rate_200909_12 = migrants_200909_12/4; 

label var migrant_rate_200909 "Monthly recruitment rate in first 8 months of 2009"; 
label var migrant_rate_200909_12 "Monthly recruitment rate in last 4 months of 2009"; 



* fifth, figure out counts and rates for 2008 as a placebo year; 


bysort agency_id: egen migrantz_200809  = sum(migrant) if inlist(month, 1,2,3,4,5,6,7,8) & year == 2008; 
bysort agency_id: egen migrants_200809 = max(migrantz_200809); 
drop migrantz_200809;

label var migrants_200809 "Number of migrants recruited in first 8 months of 2008";

bysort agency_id: egen migrantz_200812  = sum(migrant) if year == 2008; 
bysort agency_id: egen migrants_200812 = max(migrantz_200812); 
drop migrantz_200812;

label var migrants_200812 "Number of migrants recruited in 2008"; 


bysort agency_id: egen migrantz_200809_12  = sum(migrant) if inlist(month,9,10,11,12) & year == 2008; 
bysort agency_id: egen migrants_200809_12 = max(migrantz_200809_12); 
drop migrantz_200809_12;


label var migrants_200809_12 "Number of migrants recruited in last 4 months of 2008"; 


gen migrant_rate_200809 = migrants_200809/8; 
gen migrant_rate_200809_12 = migrants_200809_12/4; 

label var migrant_rate_200809 "Monthly recruitment rate in first 8 months of 2008"; 
label var migrant_rate_200809_12 "Monthly recruitment rate in last 4 months of 2008"; 





*i. interactions with treatment vars; 

foreach x of varlist solvm_0508 solvrat_0508 compint_0508 fcompint_0508 compv  
clust_halfkm clust_1km clust_5km clust_10km clust_100ft clust_250ft clust_500ft 
 elig_100ft hh_index_0708 {; 

gen `x'_elig_postm = `x'*postm*elig_firm; 
gen `x'_elig_postm_2009 = `x'*postm_2009*elig_firm;
gen `x'_elig_postmp_2009 = `x'*postmp_2009*elig_firm;

gen `x'_postm = `x'*postm; 
gen `x'_postm_2009 = `x'*postm_2009; 
gen `x'_postmp_2009 = `x'*postmp_2009; 


gen `x'_hpostm = `x'_high*postm; 
gen `x'_hpostm_2009 = `x'_high*postm_2009; 
gen `x'_hpostmp_2009 = `x'_high*postmp_2009; 


gen `x'_helig_postm = `x'_high*elig_firm*postm; 
gen `x'_helig_postm_2009 = `x'_high*elig_firm*postm_2009; 
gen `x'_helig_postmp_2009 = `x'_high*elig_firm*postmp_2009; 


gen `x'_lpostm = `x'_low*postm; 
gen `x'_lpostm_2009 = `x'_low*postm_2009; 
gen `x'_lpostmp_2009 = `x'_low*postmp_2009; 


gen `x'_lelig_postm = `x'_low*elig_firm*postm; 
gen `x'_lelig_postm_2009 = `x'_low*elig_firm*postm_2009; 
gen `x'_lelig_postmp_2009 = `x'_low*elig_firm*postmp_2009; 



};



* generate quarters; 

egen qrtrz = cut(month_year), at(1(3)133) ; 

sort qrtrz;

egen qrtr = group(qrtrz);



** diagnostics for migrant trends; 

* variable for avg over 07,08,09; 

egen migrant_0709 = rowmean(migrant_07 migrant_08 migrant_09); 
egen migrant_0809 = rowmean(migrant_08 migrant_09); 

label var migrant_0709 "yr level mean between 07, 08 and 09"; 
label var migrant_0809 "yr level mean between 08 and 09"; 


gen cutoff_1_0709 =  (migrant_0709 >= 50 & migrant_0709 <= 150);
gen cutoff_2_0709 =  (migrant_0709 >= 25 & migrant_0709 <= 175);
gen cutoff_3_0709 =  (migrant_0709 >= 0 & migrant_0709 <= 200);



gen cutoff_1_0809 =  (migrant_0809 >= 50 & migrant_0809 <= 150);
gen cutoff_2_0809 =  (migrant_0809 >= 25 & migrant_0809 <= 175);
gen cutoff_3_0809 =  (migrant_0809 >= 0 & migrant_0809 <= 200);



gen firm_agez_09 = (firm_age_yr) if year == 2009; 
bysort agency_id: egen firm_age_09 = max(firm_agez_09);

bysort agency_id : egen months = count(migrant)  ;
gen balanced_05_15 = (months == 132);

* distinct agency_id & year; 

egen ayr_tag = tag(agency_id year); 


** post after 12/31/2008; 


* dec 2007 = 48th month;
gen postr_2009 = (month_year >= 49); 


gen postr = (month_year >= 87); 


gen elig_firm_postr_2009 = elig_firm*postr_2009;
gen elig_firm_postr = elig_firm*postr;



* figure out who to exclude (i.e. agencies who could've altered); 


gen jerks = ( elig_200909 == 0 & elig_200912 == 1 );


** merge in quality of job orders; 

merge m:1 license_no month year using ../dta_secure/firm_joborder_emp_month.dta,  
keepusing(mean_jo_emp_comp_rate_pre2009 mean_jo_emp_comp_rate med_jo_emp_comp_rate_pre2009
med_jo_emp_comp_rate bad_emp_share bad_emp_pre2009_share good_emp_share good_emp_pre2009_share new_emp_share agency_inactive_jo_2009

num_vac_req_badpre2009_emp num_vac_app_badpre2009_emp num_vac_sent_badpre2009_emp num_vac_req_badpre2009_25_emp 
num_vac_app_badpre2009_25_emp num_vac_sent_badpre2009_25_emp num_vac_req_badpre2009_10_emp num_vac_app_badpre2009_10_emp 
num_vac_sent_badpre2009_10_emp num_vac_req_good_emp num_vac_app_good_emp num_vac_sent_good_emp num_vac_req_goodpre2009_emp
num_vac_app_goodpre2009_emp num_vac_sent_goodpre2009_emp num_vac_req_goodpre2009_25_emp num_vac_app_goodpre2009_25_emp 
num_vac_sent_goodpre2009_25_emp num_vac_req_goodpre2009_10_emp num_vac_app_goodpre2009_10_emp num_vac_sent_goodpre2009_10_emp
num_jo_bad_emp_pre2009_25 num_jo_bad_emp_pre2009_10 num_jo_good_emp_pre2009_25 num_jo_good_emp_pre2009_10 bad_emp_pre2009_share_25
bad_emp_pre2009_share_10 good_emp_pre2009_share_25 good_emp_pre2009_share_10 num_jo_good_emp_pre2009 num_jo_bad_emp_pre2009 );



* share of any type of job order should be zero if there are 0 job orders; 
foreach x of varlist bad_emp_share bad_emp_pre2009_share good_emp_share good_emp_pre2009_share new_emp_share {; 

replace `x' = 0 if num_job_orders == 0; 

};


* complaint rate of a job order should be missing unless there is more than one job order; 


foreach x of varlist mean_jo_emp_comp_rate_pre2009 mean_jo_emp_comp_rate med_jo_emp_comp_rate_pre2009
med_jo_emp_comp_rate {; 

replace `x' = . if (num_job_orders == . | num_job_orders == 0) ;

};


* basically, the share of 'good employers' as defined by > threshold and above median; 

foreach x of varlist good_emp_pre2009_share_10 good_emp_pre2009_share_25 good_emp_pre2009_share
num_jo_good_emp_pre2009 num_jo_good_emp_pre2009_10 num_jo_good_emp_pre2009_25{;

replace `x' = 0 if `x' == . &  num_job_orders != . ; 

}; 


** merge in quantitative ratings - month -level; 
			
merge m:1 agency_id month_year using ../dta_secure/ratings_mn.dta, keepusing(partial_qual_marks_mn) keep(1 3) gen(ratings_mn_merge);	


drop if agency_id == . ; 

* merge in renewal data; 

merge 1:1 agency_id year month using ../dta_secure/renewal_al.dta, gen(renewal_merge) keep(1 3);


* non-matches for renewal in master imply no renewals, not missing, and compute renewal  shares; 

foreach x of varlist renew_agency renew_agency_emp renew_emp renew_self renew_emp_or_self {; 

replace `x' = 0 if `x' == .  & migrant != . & migrant > 0 ; 

gen `x'_share = `x'/migrant; 

};

label var renew_agency_share "share of migrants who renew through same agency"; 
label var renew_agency_emp_share "share of migrants who renew through same agency and same employer"; 
label var renew_emp_share "share of migrants who renew through same employer"; 
label var renew_self_share "share of migrants who renew through self through same sector and country"; 
label var renew_emp_or_self_share "share of migrants who renew to same employer or self"; 

* misc; 

gen rev_sinh = asinh(usd_salary); 
gen rev_d_sinh = asinh(usd_salary_d); 


label var rev_sinh "Inverse hyberpolic sine of revenue";
label var rev_d_sinh "Inverse hyberpolic sine of revenue - 2015 dollars";


** merge in migrant quality measures; 


merge 1:1 agency_id month year using ../dta_secure/agency_mn_complainants.dta, keep(1 3) gen(complainants_merge);


replace pre_complainants = 0 if pre_complainants == . ;

label var pre_complainants "number of migrants who made complaints prior to 2010";



save ../dta_secure/4_firm_mn.dta, replace;

